/* this do-file extracts the relevant variable from the 2011 Census */ 

****************
** Census 2011** 
****************

set more off

cd "" /*input directory here with raw data */


use "sa-census-2011-person-prov-1to5-v1.2-20150825.dta", clear
append using "sa-census-2011-person-prov-6to9-v1.2-20150825.dta"
sort SN 
merge m:1 SN using "sa-census-2011-household-v1.1-20140618.dta"
drop if _merge==2 
drop _merge 


rename SN household_id
rename F00_NR id
rename F02_AGE age
rename F03_SEX sex       
rename P03_MARITAL_ST marital_status
rename DERP_EDUCATIO~L education
rename DERP_INDUSTRY  industry
rename DERP_OCCUPATION occupation
rename P29A_INDUSTRY ind_specific
rename OCCUP_LEV04 occ_most_specific 
rename OCCUP_LEV03 occ_specific 
rename P_PROVINCE province
rename P_DISTRICT district
rename P38_LASTCHILD~Y yc_birth_day
rename P38_LASTCHILDMO yc_birth_month
rename P38_LASTCHILDYR yc_birth_year
rename P16_INCOME income_category
rename P09_CITIZENSHIP south_africa_citizenship
rename DERP_EMPLOY_STATUS_OFFICIAL emp_status
rename P23A_EMPLOYMENTSTATUS wage_worker
rename P23B_EMPLOYMENTSTATUS self_employed
rename P23C_EMPLOYMENTSTATUS unpaid_work 
rename PERSON_10PER_WGT weight
rename P05_POP_GROUP race
rename P07_PROV_POB province_birth
rename P40_LASTCHILDALIVE child_alive
rename P34_CHILDBORN~T number_children
rename P04_SPN spouse_id
rename DERH_INCOME_CLASS household_income
rename P_MUNIC municipality
rename P20_EDULEVEL edu_specific
rename P01_YEAR birth_year 



gen school_attend=(P17_SCHOOLATTEND==1)  /*Person attends any type of educational institution */ 


#delimit ;

keep household_id id age sex marital_status education edu_specific industry occupation province district 
yc_birth_day yc_birth_month yc_birth_year income_category  
emp_status wage_worker self_employed unpaid_work weight race province* child_alive  number_children spouse_id  
household_income municipality school_attend birth_year
ind_specific occ_specific occ_most_specific 
H02_MAINDWELLING H13_REFRIDGERATOR H13_RADIO H13_TV H13_COMPUTER H13_CELLPHONE H04_TENURE H13A_INTERNET H03_TOTROOMS H07_WATERPIPED H11_ENERGY_LIGHTING H10_TOILET; 
#delimit cr 


***********************
/* Employment Status */ 
***********************

gen status=1 if emp_status==1 
replace status=2 if emp_status==1 & (self_employed==1 | unpaid_work==1) /* Includes people employed in household business as self-employed */
replace status=3 if emp_status==2
replace status=4 if emp_status==3 | emp_status==4 


label define status 1 "Employee" 2 "Self-Employed" 3 "Unemployed" 4 "Inactive"
label values status status 


***********************
* Years of Education ** 
***********************

gen yrs_schooling=edu_specific

recode yrs_schooling (98=0) (0=1) (1=2) (2=3) (3=4) (4=5) (5=6) (6=7) (7=8) (8=9) (9=10) (10=11) (11=12) (12=13) (13=11) (14=12) (15=13) (19=12) (20=12) (21=14) (22=14) (16/19=14) (23/28=14) (29=.) 
label var yrs_schooling "Years of Schooling (Careful: Grade 0=1, and 14 indicates any post high school studies)"  
replace yrs_schooling=. if yrs_schooling<0 | yrs_schooling>14


*******************
* Household Assets* 
*******************

* House Type * 

gen house_type=(H02_MAINDWELLING==1)
label define house_type 0 "Other" 1 "House or Brick Structure"
label values house_type house_type
drop H02_MAINDWELLING


** Fridge ** 
gen fridge=1 if H13_REFRIDGERATOR==1 
replace fridge=0 if H13_REFRIDGERATOR==2
drop H13_REFRIDGERATOR

** Radio ** 
gen radio=1 if H13_RADIO==1 
replace radio=0 if H13_RADIO==2
drop H13_RADIO

** TV ** 

gen tv=1 if H13_TV==1 
replace tv=0 if H13_TV==2 
drop H13_TV

** Computer ** 

gen computer=1 if H13_COMPUTER==1 
replace computer=0 if H13_COMPUTER==2 
drop H13_COMPUTER

** Cell Phone ** 

gen cell_phone=1 if H13_CELLPHONE==1 
replace cell_phone=0 if H13_CELLPHONE==2 

drop H13_CELLPHONE

** House Tenure ** 

rename H04_TENURE hh_tenure
recode hh_tenure (4=1) (1=3) (3=4)
label define hh_tenure 1 "Owned and Fully Paid" 2 "Owned but not yet Fully Paid" 3 "Rented" 4 "Occupied Rent free" 5 "Other"
label values hh_tenure hh_tenure 

** Number of House rooms ** 

rename H03_TOTROOMS hh_rooms

** Internet ** 

gen internet=1 if H13A_INTERNET==1
replace internet=2 if H13A_INTERNET==2 
drop H13A_INTERNET

** Water In dewlling ** 

gen hh_water=(H07_WATERPIPED==1)
drop H07_WATERPIPED
label var hh_water "Dwelling with water connection"


** Eletricity ** 

gen electricity=(H11_ENERGY_LIGHTING==1)
drop H11_ENERGY_LIGHTING
label var electricity "Electricity source for lighting"

** Toilet ** 

gen h_toilet=(H10_TOILET==1 | H10_TOILET==2 | H10_TOILET==3)
label var h_toilet "1 if toilet, 0 if latrine or nothing"

drop H10_TOILET


*******************
/*Household Size */ 
*******************

gen ind=1 
bys household_id: egen household_size=sum(ind)


gen year=2011 /* year variable */


save south_africa_census_2011_wber.dta, replace 
